Code
# Load package(s)
library(tidyverse)
library(ggrepel)
library(patchwork)
library(cowplot)
# Load data
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")
load("data/titanic.rda")
load("data/blue_jays.rda")Data Visualization (STAT 302)
The goal of this lab is to explore methods for annotating and positioning with ggplot2 plots. This lab also utilizes scale_* to a greater degree which is part of our next reading. In fact, students may find going through/reading chapter 11 Colour scales and legends useful.
We’ll be using the blue_jays.rda, titanic.rda, Aus_athletes.rda, and tech_stocks.rda datasets.
# Load package(s)
library(tidyverse)
library(ggrepel)
library(patchwork)
library(cowplot)
# Load data
load("data/Aus_athletes.rda")
load("data/tech_stocks.rda")
load("data/titanic.rda")
load("data/blue_jays.rda")Complete the following exercises.
Using the blue_jays.rda dataset, recreate the following graphic as precisely as possible.
Hints:
label_info dataset that is a subset of original data, just with the 2 birds to be labeledlabel_info <- blue_jays %>%
filter(BirdID %in% c("702-90567", "1142-05914"))
y_range <- range(blue_jays$Head)
x_range <- range(blue_jays$Mass)
ggplot(blue_jays, aes(Mass, Head, color = KnownSex)) +
geom_point(alpha = 0.8, size = 2, show.legend = FALSE) +
theme_classic() +
geom_text(data = label_info,
aes(label = KnownSex),
show.legend = FALSE,
nudge_x = 0.5) +
annotate("text", x = x_range[1], y = y_range[2],
label = "Head length versus body mass for 123 blue jays",
size = 4,
hjust = 0,
vjust = 1) +
labs(x = "Body mass (g)",
y = "Head length (mm)")Using the tech_stocks dataset, recreate the following graphics as precisely as possible. Use the column price_indexed.
Hints:
label_info dataset that is a subset of original data, just containing the last day’s information for each of the 4 stockslabel_info <- tech_stocks %>%
arrange(desc(date)) %>%
slice_head(n = 4)
xrnge <- range(tech_stocks$date)
yrnge <- range(tech_stocks$price_indexed)
ggplot(tech_stocks, aes(date, price_indexed, color = company, family = "serif")) +
geom_line(show.legend = FALSE) +
labs(x = NULL,
y = "Stock price, indexed") +
geom_text(data = label_info,
aes(label = company),
show.legend = FALSE,
color = "black",
family = "serif") +
annotate("text", xrnge[1], yrnge[2],
label = "Stock price over time for four major tech companies",
vjust = 1, hjust = 0, size = 4, family = "serif") +
theme_minimal()Hints:
ggrepel
box.padding is 0.6ggplot(tech_stocks, aes(date, price_indexed, color = company, family = "serif")) +
geom_line(show.legend = FALSE) +
labs(x = NULL,
y = "Stock price, indexed") +
ggrepel::geom_text_repel(data = label_info,
aes(label = company),
show.legend = FALSE,
color = "black",
box.padding = 0.6,
min.segment.length = 0,
seed = 9876,
hjust = "right") +
annotate("text", xrnge[1], yrnge[2],
label = "Stock price over time for four major tech companies",
vjust = 1, hjust = 0, size = 4, family = "serif") +
theme_minimal()Using the titanic.rda dataset, recreate the following graphic as precisely as possible.
Hints:
died and survived as levels/categories#D55E00D0, #0072B2D0 (no alpha is being used)titanic1 <- titanic %>%
mutate(mortality = as_factor(ifelse(survived == 0, "died", "survived"))) %>%
select(!survived)
colors <- c("#D55E00D0", "#0072B2D0")
ggplot(titanic1, aes(sex, fill = sex)) +
geom_bar(show.legend = FALSE) +
labs(x = NULL) +
facet_grid(fct_rev(mortality) ~ class) +
theme_minimal() +
scale_fill_manual(values = colors)Use the athletes_dat dataset — extracted from Aus_althetes.rda — to recreate the following graphic as precisely as possible. Create the graphic twice: once using patchwork and once using cowplot.
# Get list of sports played by BOTH sexes
both_sports <- Aus_athletes %>%
# dataset of columns sex and sport
# only unique observations
distinct(sex, sport) %>%
# see if sport is played by one gender or both
count(sport) %>%
# only want sports played by BOTH sexes
filter(n == 2) %>%
# get list of sports
pull(sport)
# Process data
athletes_dat <- Aus_athletes %>%
# only keep sports played by BOTH sexes
filter(sport %in% both_sports) %>%
# rename track (400m) and track (sprint) to be track
# case_when will be very useful with shiny apps
mutate(
sport = case_when(
sport == "track (400m)" ~ "track",
sport == "track (sprint)" ~ "track",
TRUE ~ sport
)
)Hints:
#D55E00D0 & #0072B2D0 — no alpha#D55E00D0 & #0072B2D0 — no alpharcc is red blood cell count; wcc is white blood cell count#D55E00 and #0072B2; shading #D55E0040 and #0072B240c("female ", "male")patchwork#rename f/m -> female/male
levels(athletes_dat$sex) <- list(female = "f", male = "m")
#colors
colors <- c("#D55E00D0", "#0072B2D0")
#label
label1 <- athletes_dat %>%
group_by(sex) %>%
summarize(n = n())
#barplot
bar <- ggplot(athletes_dat, aes(sex)) +
geom_bar(fill = colors) +
ylim(0, 95) +
theme_minimal() +
labs(x = NULL,
y = "number") +
geom_text(
data = label1,
aes(label = n, y = n),
vjust = "top",
nudge_y = -5,
size = 5
)
#scatterplot
scatter <- ggplot(athletes_dat, aes(x = rcc, y = wcc, fill = sex)) +
geom_point(
show.legend = FALSE,
shape = 21,
color = "white",
size = 3
) +
theme_minimal() +
scale_fill_manual(values = c("#D55E00D0", "#0072B2D0")) +
labs(x = "RBC count", y = "WBC count")
#boxplot
box <- ggplot(athletes_dat, aes(sport, pcBfat, fill = sex, color = sex)) +
geom_boxplot(width = .5) +
scale_fill_manual(values = c("#D55E0040", "#0072B240"),
labels = c("female ", "male")) +
scale_color_manual(values = c("#D55E00", "#0072B2"),
labels = c("female ", "male")) +
theme_minimal() +
labs(x = NULL,
y = "% body fat",
fill = NULL,
color = NULL) +
theme(
legend.position = c(1, 1),
legend.justification = c(1, 1),
legend.direction = "horizontal"
) +
guides(color = guide_legend(override.aes = list(
fill = c(colors), color = NA
)))
#arrange plots
(bar | scatter) / box cowplotUse cowplot::plot_grid() to combine them.
top_row <- plot_grid(bar, scatter)
plot_grid(top_row, box, ncol = 1)Create the following graphic using patchwork.
Hints:
"A"inset <- inset_element(bar, left = 0.75, right = 1, bottom = 0, top = 0.45) & theme_classic()
scatter + inset + plot_annotation(tag_levels = "A")